--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "network_embeding.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class SemanticNetwork[source]

SemanticNetwork(nneigh, min_sim, model)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
from gensim.models.doc2vec import Doc2Vec
{% endraw %} {% raw %}
model = Doc2Vec.load('zika.d2v')
{% endraw %} {% raw %}
SN = SemanticNetwork(10,0.7,model)
{% endraw %} {% raw %}
vecs = SN.get_vectors()
{% endraw %} {% raw %}
clust = SN.get_agglomerative_cluster(vecs)
{% endraw %} {% raw %}
SN.grow_network(vecs)
(33558528,)
(8193, 8193)
{% endraw %} {% raw %}
SN.net[16][100]
AtlasView({0: {'weight': 0.7948908195795175}})
{% endraw %} {% raw %}
communities = greedy_modularity_communities(SN.net, weight='weight', n_communities=100, resolution=3)
# communities = [c for c in girvan_newman(SN.net)]
# communities = [c for c in asyn_lpa_communities(SN.net, weight='weight')]
print(f'Found {len(communities)} communities')
print(len(communities[0]))
plt.hist([len(c) for c in communities if len(c)>1], bins=10);
Found 8039 communities
53
{% endraw %} {% raw %}
{% endraw %} {% raw %}

plot_network[source]

plot_network(G)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
plot_network(SN.net.subgraph(communities[0]))
{% endraw %}

Hierarchical clustering

{% raw %}
import seaborn as sns 
{% endraw %} {% raw %}
g = sns.clustermap(vecs, method='complete', metric='cosine', figsize =(15,35));
{% endraw %}

Extracting clusters from the dendrogram

{% raw %}

get_clusters[source]

get_clusters(linkage, dists, threshold=0.9)

Returns cluster id for every document in the corpus

{% endraw %} {% raw %}
{% endraw %} {% raw %}
doc_linkage = g.dendrogram_row.linkage
dists = pdist(vecs,metric='cosine')
cs = get_clusters(doc_linkage,dists, 0.6)
Counter(cs)
Counter({16: 53,
         35: 385,
         33: 212,
         6: 131,
         73: 1279,
         37: 148,
         79: 124,
         4: 161,
         69: 64,
         57: 85,
         26: 154,
         8: 275,
         14: 186,
         39: 89,
         15: 40,
         64: 158,
         66: 85,
         31: 167,
         32: 266,
         25: 265,
         54: 140,
         44: 42,
         27: 244,
         2: 248,
         41: 106,
         58: 107,
         18: 178,
         23: 80,
         59: 113,
         21: 236,
         77: 141,
         43: 110,
         62: 30,
         9: 75,
         34: 164,
         75: 59,
         42: 91,
         78: 131,
         7: 45,
         61: 33,
         45: 39,
         13: 69,
         76: 51,
         46: 70,
         3: 111,
         30: 56,
         24: 49,
         53: 139,
         19: 148,
         38: 35,
         67: 54,
         20: 21,
         36: 44,
         11: 24,
         29: 50,
         1: 34,
         28: 15,
         47: 20,
         81: 32,
         72: 25,
         71: 53,
         80: 18,
         68: 36,
         40: 21,
         70: 62,
         74: 51,
         63: 12,
         10: 43,
         17: 25,
         5: 19,
         55: 12,
         22: 16,
         12: 16,
         56: 5,
         60: 2,
         65: 5,
         50: 3,
         48: 2,
         49: 4,
         52: 1,
         51: 1})
{% endraw %} {% raw %}
fig, ax = plt.subplots(1,1, figsize =(15,10))
den = dendrogram(doc_linkage,color_threshold=0.6, ax=ax)
{% endraw %} {% raw %}

{% endraw %}